package com.chance.cc.crawler.development.bootstrap.dahe;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.downloader.HttpConfig;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRecordFilter.key;
import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.*;

/**
 * @Author Zhao.Hhuan
 * @Date Create in 2021/3/12 10:57
 * @Description
 *      大河网
 **/
public class DaHe {
    private static final String domain = "dahe";

    public static void main(String[] args) {
//        keywordRecord();
        keywordItemRecord();
    }

    private static void keywordRecord(){
        String url = "https://s.dahe.cn/";
        String site = "searchKw";
        String siteBiz = "forum";
        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPageItem)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 7,null))
                .releaseTime(System.currentTimeMillis())
                .needWashed(false)
                .needParsed(false)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .build();
        crawlerRequestRecord.setDownload(false);
        crawlerRequestRecord.setSkipPipeline(true);
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        CrawlerRecord commentFilter = new CrawlerRequestRecord();
        commentFilter.setFilter(key);
        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));
        crawlerRequestRecord.getHttpRequest().addExtra("searchKwSourceUrl","https://s.dahe.cn/?kw=%s&from=2");

        CrawlerRequestRecord keywordRecord = CrawlerRequestRecord.builder()
                .startPageRequest("dahe_forum_keyword",turnPageItem)
                .httpUrl("http://192.168.1.215:9599/v1/meta/"+domain+"/keys?site=forum")
                .requestLabelTag(supportSource)
                .requestLabelTag(internalDownload)
                .build();

        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .supportRecord(keywordRecord)
                .build()
                .start();
    }


    private static void keywordItemRecord(){
        String url = "https://s.dahe.cn/?kw=%E5%A4%A7%E6%B2%B3%E7%BD%91&from=1";
        String site = "searchKw";
        String siteBiz = "forum";
        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 7,null))
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
//                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        CrawlerRecord commentFilter = new CrawlerRequestRecord();
        commentFilter.setFilter(key);
        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));


        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .build()
                .start();
    }

}
